rm(list = ls())
gc()
set.seed(63296)
packages <-c("readstata13","ggplot2","furrr","future")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages)

setwd("PUT YOUR WORKING DIRECTORY HERE")
setwd("/Users/hagerans/Dropbox/Research/Polish_priests/8_Replicationn/replication")


future::plan("multisession")

## load data
data <- read.csv("./Datasets/panel_bare_bones.csv")

## clean data
data$X <- NULL
data$locality_year <- NULL
data$subbotnik_inkind <- NULL
data$locality <- as.character(tolower(data$locality))

# load covariates, including spy priests
data <- merge(data, read.csv("./Datasets/main_confounders.csv")[,c("locality", "region", "pop", "schools", "shops", "restaurants", "cinemas", "Frac48", "coal_sqm", "minerals", "partition", "priests_continuous", "priests_relocated", "pop")], by = c("locality"), all.x = T)
data$region_numeric <- as.numeric(data$region)


###############################
### REGRESSIONS FOR STRIKES ###
###############################

#subset to years with observations:
data_t <- data[which(data$year>1979),]

# create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth_index <- scale(data_t$wealth_index)

# create state capacity index
data_t$state_capacity_index <- scale(data_t$schools)

# create ethnic diversity index
data_t$ethnic_diversity_index <- scale(data_t$Frac48)

# create Russian occupation index
data_t$Russian_occupation_index <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation_index <- scale(data_t$Russian_occupation_index)

# create industrialization index
data_t$industrialization_index <- scale(data_t$minerals)

# create grievance index
data_t$grievance_index <- scale(data_t$coal_sqm)


#### OLS when averaging over years
data_agg <- aggregate(data_t[,c("strikes", "pop", "region_numeric", "commanders", "wealth_index", "state_capacity_index", "ethnic_diversity_index",
                                "Russian_occupation_index", "industrialization_index", "grievance_index")], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)
m_strikes_ols_averaged <- lm(scale(strikes) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance_index + factor(region_numeric) , data = data_agg)



###############################
### RANDOMIZATION INFERENCE ###
###############################

randomization <- function(reps){
  rep <- function(x){
    data_temp <- data_agg
    data_temp$commanders <- sample(data_agg$commanders, 297, replace = F)
    estimate <- summary(lm(scale(strikes) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance_index + factor(region_numeric) , data = data_temp))$coefficients[2]
    return(estimate)
  }
  out <- furrr::future_map_dbl(1:reps, ~rep(.x), .progress = TRUE)
  out <- as.data.frame(out)
  return(out)
}

store_d <- randomization(reps = 10000)
  
ggplot(data=store_d, aes(out)) + 
  geom_histogram(bins = 300) +
  xlim(c(-0.1,0.1)) +
  geom_vline(xintercept = summary(lm(scale(strikes) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + grievance_index + Russian_occupation_index + industrialization_index + factor(region_numeric) , data = data_agg))$coefficients[2], 
             linetype= 2, 
               color = "red", size=0.7) +
  theme_classic() +
  labs(title="") +
  labs(x="Estimate", y="Count") +
  annotate(geom="text", x=summary(lm(scale(strikes) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + grievance_index + industrialization_index + factor(region_numeric) , data = data_agg))$coefficients[2]-0.02, 
           y=300, label="P-value: \n0.015",
             color="red", fontsize=0.7)
# p-value
sum(store_d$out > summary(lm(scale(strikes) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + grievance_index + industrialization_index + factor(region_numeric) , data = data_agg))$coefficients[2])/length(store_d$out)


ggsave("PUT YOUR DIRECTORY HERE", width = 4, height = 3)


################################
### REGRESSIONS FOR SABOTAGE ###
################################


# subset to years with observations:
data_t <- data[which(data$year<1980),]

# create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth_index <- scale(data_t$wealth_index)

# create state capacity index
data_t$state_capacity_index <- scale(data_t$schools)

# create ethnic diversity index
data_t$ethnic_diversity_index <- scale(data_t$Frac48)

# create Russian occupation index
data_t$Russian_occupation_index <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation_index <- scale(data_t$Russian_occupation_index)

# create industrialization index
data_t$industrialization_index <- scale(data_t$minerals)

# create grievance index
data_t$grievance_index <- scale(data_t$coal_sqm)


#### OLS when averaging over years
data_agg <- aggregate(data_t[,c("subbotnik_inkind_", "pop", "region_numeric", "commanders", "wealth_index", "state_capacity_index", "ethnic_diversity_index",
                                "Russian_occupation_index", "industrialization_index", "grievance_index")], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)
m_strikes_ols_averaged <- lm(scale(subbotnik_inkind_) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance_index + factor(region_numeric) , data = data_agg)
summary(m_strikes_ols_averaged)


###############################
### RANDOMIZATION INFERENCE ###
###############################

randomization <- function(reps){
  rep <- function(x){
    data_temp <- data_agg
    data_temp$commanders <- sample(data_agg$commanders, 297, replace = F)
    estimate <- summary(lm(scale(subbotnik_inkind_) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance_index + factor(region_numeric) , data = data_temp))$coefficients[2]
    return(estimate)
  }
  out <- furrr::future_map_dbl(1:reps, ~rep(.x), .progress = TRUE)
  out <- as.data.frame(out)
  return(out)
}

store_d <- randomization(reps = 10000)


ggplot(data=store_d, aes(out)) + 
  geom_histogram(bins = 300) +
  xlim(c(-0.1,0.1)) +
  geom_vline(xintercept = summary(lm(scale(subbotnik_inkind_) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance_index + factor(region_numeric) , data = data_agg))$coefficients[2]*-1, 
             linetype= 2, 
             color = "red", size=0.7) +
  theme_classic() +
  labs(title="") +
  labs(x="Estimate", y="Count") +
  annotate(geom="text", x=summary(lm(scale(subbotnik_inkind_) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance_index + factor(region_numeric) , data = data_agg))$coefficients[2]*-1-0.02, 
           y=300, label="P-value: \n0.017",
           color="red", fontsize=0.7)
# p-value
sum(store_d$out > summary(lm(scale(subbotnik_inkind_) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance_index + factor(region_numeric) , data = data_agg))$coefficients[2])/length(store_d$out)


ggsave("PUT YOUR WORKING DIRECTORY HERE", width = 4, height = 3)

